#####################################
# ANALYSIS HELPER FUNCTIONS 
# Functions that make life easier
# 
# Part of replication of:
# Traditional Institutions in Africa, Past and Present
#
# Political Science Research and Methods
#
# By Clara Neupert-Wentz and Carl Müller-Crepon, 2023
#
##########################################



# (function) Latex entry for FStat with multiple instruments
latex.fstat.multi <- function(model.ls, f.id, name, condfstat.type){
  latex.addline(name, unlist(lapply(model.ls, function(m){
    f <- try(condfstat(m, type = condfstat.type)[,f.id])
    if(class(f) == "try-error"){ "" } else { as.character(round_any(f, 0.01))}
  })))
} 

# (function) Add line to latex table

# ... with multicolumn
latex.addline <- function(title,entries){c(title,paste0("\\multicolumn{1}{c}{",entries,"}"))}

# ... no multicolumn
latex.addline_nomc <- function(title,entries){c(title,entries)}

# (function) Mean of dependent variable for latex table
latex.mean.dv <- function(model.ls){
  c("Mean DV:",paste0("\\multicolumn{1}{c}{",signif(unlist(lapply(model.ls, function(m){mean(m$response)})), 2),"}"))
}

# (function) SD of dependent variable for latex table
latex.mean.sd <- function(model.ls){
  c("Std.-dev. DV:",paste0("\\multicolumn{1}{c}{",signif(unlist(lapply(model.ls, function(m){sd(m$response)})), 2),"}"))
}

# (function) Extract all coefficients from a list of felm models
extract_coef <- function(m.list){
  lapply(m.list, function(m){
    list(dv = colnames(m$coefficients),
         coef = m$coefficients,
         clustervcv = m$clustervcv)
  })
}

# (function) helper function to combine
make_form <- function(dv, expl, fe, iv = "0", se = "0" ){
  if(is.null(expl)){
    expl <- "0"
  }
  as.formula(paste(dv, "~", paste(expl, collapse = "+"), "|",
                   paste(fe, collapse = "+"), "|", iv, "|", se))
}

# (function) Generate Regression weights
gen_weights <- function(vec){
  tab <- table(vec)
  weights <- data.frame(cbind(weight = as.numeric(tab), unit = as.character(names(tab))),
                        stringsAsFactors = F)
  weight.vec <- 1/as.numeric(join(data.frame(unit = as.character(vec), stringsAsFactors = F),
                                  weights, type = "left", by = "unit")[,2])
  return(weight.vec)
  
}

# (function) Make coefficients with SE to nice text
effect_text <- function(coef, se, trans_fun = NULL, accuracy = 1){
  if(is.null(trans_fun)){
    paste0(signif(coef,accuracy),
           " [", signif(coef - se*1.95,accuracy),", ", 
           signif(coef + se*1.95,accuracy), "]")
  } else {
    paste0(signif(trans_fun(coef),accuracy),
           " [", signif(trans_fun(coef - se*1.95),accuracy),", ", 
           signif(trans_fun(coef + se*1.95),accuracy), "]")
  }
}


# (function) Make stargazer columnlabels with one extra layer and rulers
collab_w_ruler <- function(column.labels, column.separate, trim = 10, add.below = NULL){
  cmidrule <- paste(paste0("\\cmidrule(lr{",trim,"pt}){", c(2, 2 + cumsum(column.separate[-length(column.separate)])), "-",
                           c(1+ cumsum(column.separate)), "}"), collapse = " ")
  if(is.null(add.below)){
    result <-  c(column.labels[-length(column.labels)], 
                 paste0(column.labels[length(column.labels)], "} \\\\ ", cmidrule, " \\\\[-6ex] {"))
  } else {
    add.b <- paste(paste0(" & \\multicolumn{1}{c}{", add.below, "}"), collapse = "")
    result <-  c(column.labels[-length(column.labels)], 
                 paste0(column.labels[length(column.labels)], "} \\\\ ", cmidrule, add.b, " {"))
  }
  
  return(result)
}

# (function) PCA to Latex
pca_gazer <- function(pca.res, var.labs = NULL, digits = 3, title = "PCA", label = "pca", col.sep = "0pt"){
  
  # Check valid prcomp object
  stopifnot(inherits(pca.res, "prcomp"))
  
  
  # Make Numeric matrix
  sum.mat <- rbind(pca.res$sdev,
                   pca.res$sdev^2 / sum(pca.res$sdev^2),
                   cumsum(pca.res$sdev^2 / sum(pca.res$sdev^2)))
  cov.mat <- pca.res$rotation
  
  num.mat <- rbind(rep(NA, ncol(sum.mat)),
                   sum.mat,
                   rep(NA, ncol(sum.mat)),
                   cov.mat)
  num.mat <- round(num.mat, digits)
  num.mat <- apply(num.mat, 2, sprintf, fmt = "%.3f")
  num.mat[num.mat == "NA"] <- ""
  
  # Variable labels
  if(is.null(var.labs)){
    var.labs <- rownames(cov.mat)
  }
  sum.labs <- c("Standard deviation", "Proportion of Variance", "Cumulative Proportion")
  all.labs <- c("Summary statistics:",
                paste0("\\quad ", sum.labs),
                " \\\\[-1.8ex]\\hline \\\\[-1.8ex] Factor loadings:",
                paste0("\\quad ", var.labs))
  
  
  # Make table
  tab.head <- paste0("\\begin{table}[!htbp] \\centering ",
                     "\\caption{", title, "}", 
                     "\\label{", label,"} ",
                     "\\begin{tabular}{@{\\extracolsep{",col.sep,"}}l",paste(rep(paste0("D{.}{.}{-", digits,"}"), nrow(cov.mat)), collapse = ""),
                     "} \\\\[-1.8ex]\\hline ",
                     "\\hline \\\\[-1.8ex] ",
                    " & \\multicolumn{",nrow(cov.mat),"}{c}{Component} \\\\  \\\\[-1.8ex] ",
                     "\\cline{2-",nrow(cov.mat)+1,"} ",
                     "\\\\[-1.8ex]",paste(paste0("& \\multicolumn{1}{c}{(", 1:ncol(num.mat),")}"), collapse = " ") ,
                    " \\\\ \\\\[-1.8ex]  \\hline \\\\[-1.8ex] "
                     )
  tab.main <- paste(apply(data.frame(all.labs, num.mat), 1, paste, collapse = " & "), collapse = "\\\\ \n ")
  tab.foot <- "\\\\[-1.8ex]  \\\\ \\hline \\hline \\\\[-1.8ex] \\end{tabular}  \\end{table} "
  
  # Return
  return(paste(c(tab.head, tab.main, tab.foot), collapse = " \n "))
  
  
}


## Summarizes data.
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
## From  http://www.cookbook-r.com/Manipulating_data/Summarizing_data/
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  require(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- plyr::rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}







